package com.zhang.service;

import com.zhang.pojo.Article;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.net.URL;

public class HtmlParse {

    Logger logger = LoggerFactory.getLogger(getClass());

    public Article parseHtml(String url, int i) throws Exception {

        Document document = Jsoup.parse(new URL(url), 180000);
        //Document document = Jsoup.parse(client.getHtml(url));
        Elements elements = document.getElementById("wp_news_w6").getElementsByTag("li");
        //标题
        String articleTitle =  elements.get(i-1).getElementsByTag("a").eq(0).attr("title");
        //文章来源
        String source =  elements.get(i-1).getElementsByTag("span").eq(0).text();

        //文章链接
        String href =  elements.get(i-1).getElementsByTag("a").eq(0).attr("href");

        //发布时间
        String pubtime =  elements.get(i-1).getElementsByClass("news_meta").text();

        //文章的详细内容
        String content = new ArticleParse().parseArticle(href);


        if (content.equals("")){
            logger.info("【警告】链接异常" + articleTitle);
            return null;
        }
        logger.info("【正常】链接正常" + articleTitle);
        return new Article(href, articleTitle, pubtime, source, content);
    }
}